/*+**************************************************************************/
/***                                                                      ***/
/***   This file is distributed under a BSD license.                      ***/
/***   See LICENSE.txt for details.                                       ***/
/***                                                                      ***/
/**************************************************************************+*/

asc
{
  cbuffer Wz4IppVSPara : register(c0) : slot vs 0
  {
    row_major float4x4 mvp;
  };
  cbuffer Wz4IppPSPara : register(c0) : slot ps 0
  {
    float4 Color0;
    float4 Color1;
    float4 Color2;
    float4 Color3;
  };
  
  cbuffer Wz4IppColorGradePSPara : register(c0) : slot ps 0
  {
    float4 Params1;
    float4 Gamma;
    float4 LumWeights;
    float4 VignetteMatrix;
    float4 VignetteShift;
    
    float4 MatrixSr;
    float4 MatrixSg;
    float4 MatrixSb;
    float4 MatrixMr;
    float4 MatrixMg;
    float4 MatrixMb;
    float4 MatrixHr;
    float4 MatrixHg;
    float4 MatrixHb;
  };
  
  permute Wz4IppColorGradePSPerm
  {
    UseSMH;
    UseGamma;
  };
  
  cbuffer Wz4IppFXAA3Para : register (c0) : slot ps 0
  {
    float4 rcpFrame;
  };

  cbuffer Wz4IppVSCustomPara : register(c0) : slot vs 0
  {
    row_major float4x4 mvp;
    row_major float4x4 mv;
    float4 eye;
    float4 vs_var0;
    float4 vs_var1;
    float4 vs_var2;
    float4 vs_var3;
    float4 vs_var4;
  };

  cbuffer Wz4IppPSCustomPara : register (c0) : slot ps 0
  {
    row_major float4x4 mvp;
    row_major float4x4 mv;
    float4 eye;
    float4 ps_var0;
    float4 ps_var1;
    float4 ps_var2;
    float4 ps_var3;
    float4 ps_var4;
    float2 resolution;
  };
}

/****************************************************************************/

material Wz4IppCopy
{
  vs asc vs_3_0
  {
    use Wz4IppVSPara;
    void main
    (
      in float3 in_pos : POSITION,
      in float4 in_col : COLOR0,
      in float2 in_uv0 : TEXCOORD0,
      out float2 out_uv0 : TEXCOORD0,
      out float4 out_pos : POSITION,
    )
    {
      out_uv0 = in_uv0;
      out_pos = mul(float4(in_pos,1),mvp);
    }
  }
  ps asc ps_3_0
  {
    use Wz4IppPSPara;
    sampler2D s0 : register(s0);
    void main
    (
      in float2 uv0 : TEXCOORD0,
      out float4 r : COLOR0,
    )
    {
      r = Color0 * tex2D(s0,uv0) + Color1;
    }
  }
};

/****************************************************************************/

material Wz4IppSharpen
{
  vs asc vs_3_0
  {
    use Wz4IppVSPara;
    void main
    (
      in float3 in_pos : POSITION,
      in float4 in_col : COLOR0,
      in float2 in_uv0 : TEXCOORD0,
      out float2 out_uv0 : TEXCOORD0,
      out float4 out_pos : POSITION,
    )
    {
      out_uv0 = in_uv0;
      out_pos = mul(float4(in_pos,1),mvp);
    }
  }
  ps asc ps_3_0
  {
    use Wz4IppPSPara;
    sampler2D s0 : register(s0);
    sampler2D s1 : register(s1);
    void main
    (
      in float2 uv0 : TEXCOORD0,
      out float4 r : COLOR0,
    )
    {
      r = (tex2D(s0,uv0) - tex2D(s1,uv0) * Color0) * Color1;
    }
  }
};

/****************************************************************************/

material Wz4IppDebugZ
{
  vs asc vs_3_0
  {
    use Wz4IppVSPara;
    void main
    (
      in float3 in_pos : POSITION,
      in float4 in_col : COLOR0,
      in float2 in_uv0 : TEXCOORD0,
      out float2 out_uv0 : TEXCOORD0,
      out float4 out_pos : POSITION,
    )
    {
      out_uv0 = in_uv0;
      out_pos = mul(float4(in_pos,1),mvp);
    }
  }
  ps asc ps_3_0
  {
    use Wz4IppPSPara;
    sampler2D s0 : register(s0);
    void main
    (
      in float2 uv0 : TEXCOORD0,
      out float4 r : COLOR0,
    )
    {
      r.xyz = tex2D(s0,uv0).xxx*Color0.xyz;
      r.w = 1;
    }
  }
};
material Wz4IppDebugNormal
{
  vs asc vs_3_0
  {
    use Wz4IppVSPara;
    void main
    (
      in float3 in_pos : POSITION,
      in float4 in_col : COLOR0,
      in float2 in_uv0 : TEXCOORD0,
      out float2 out_uv0 : TEXCOORD0,
      out float4 out_pos : POSITION,
    )
    {
      out_uv0 = in_uv0;
      out_pos = mul(float4(in_pos,1),mvp);
    }
  }
  ps asc ps_3_0
  {
    use Wz4IppPSPara;
    sampler2D s0 : register(s0);
    void main
    (
      in float2 uv0 : TEXCOORD0,
      out float4 r : COLOR0,
    )
    {
      r.xyz = tex2D(s0,uv0).yzw*0.5+0.5;
      r.w = 1;
    }
  }
};
material Wz4IppDebugAlpha
{
  vs asc vs_3_0
  {
    use Wz4IppVSPara;
    void main
    (
      in float3 in_pos : POSITION,
      in float4 in_col : COLOR0,
      in float2 in_uv0 : TEXCOORD0,
      out float2 out_uv0 : TEXCOORD0,
      out float4 out_pos : POSITION,
    )
    {
      out_uv0 = in_uv0;
      out_pos = mul(float4(in_pos,1),mvp);
    }
  }
  ps asc ps_3_0
  {
    use Wz4IppPSPara;
    sampler2D s0 : register(s0);
    void main
    (
      in float2 uv0 : TEXCOORD0,
      out float4 r : COLOR0,
    )
    {
      r.xyz = tex2D(s0,uv0).www;
      r.w = 1;
    }
  }
};

/****************************************************************************/

material Wz4IppSampleLine9
{
  vs asc vs_3_0
  {
    use Wz4IppVSPara;
    void main
    (
      in float3 in_pos : POSITION,
      in float4 in_col : COLOR0,
      in float2 in_uv0 : TEXCOORD0,
      out float2 out_uv0 : TEXCOORD0,
      out float4 out_pos : POSITION,
    )
    {
      out_uv0 = in_uv0;
      out_pos = mul(float4(in_pos,1),mvp);
    }
  }
  ps asc ps_3_0
  {
    use Wz4IppPSPara;
    sampler2D s0 : register(s0);
    void main
    (
      in float2 uv0 : TEXCOORD0,
      out float4 r : COLOR0,
    )
    {
      float2 radius = Color2.zw;
      r = tex2D(s0,uv0.xy-4*radius)*Color0.x
        + tex2D(s0,uv0.xy-3*radius)*Color0.y
        + tex2D(s0,uv0.xy-2*radius)*Color0.z
        + tex2D(s0,uv0.xy-1*radius)*Color0.w
        + tex2D(s0,uv0.xy+0*radius)*Color1.x
        + tex2D(s0,uv0.xy+1*radius)*Color1.y
        + tex2D(s0,uv0.xy+2*radius)*Color1.z
        + tex2D(s0,uv0.xy+3*radius)*Color1.w
        + tex2D(s0,uv0.xy+4*radius)*Color2.x;
    }
  }
};

/****************************************************************************/

material Wz4IppSampleRect9
{
  vs asc vs_3_0
  {
    use Wz4IppVSPara;
    void main
    (
      in float3 in_pos : POSITION,
      in float4 in_col : COLOR0,
      in float2 in_uv0 : TEXCOORD0,
      out float2 out_uv0 : TEXCOORD0,
      out float4 out_pos : POSITION,
    )
    {
      out_uv0 = in_uv0;
      out_pos = mul(float4(in_pos,1),mvp);
    }
  }
  ps asc ps_3_0
  {
    use Wz4IppPSPara;
    sampler2D s0 : register(s0);
    void main
    (
      in float2 uv0 : TEXCOORD0,
      out float4 r : COLOR0,
    )
    {
      float rx = Color2.z;
      float ry = Color2.w;

      r = tex2D(s0,float2(uv0.x-rx,uv0.y-ry))*Color0.x
        + tex2D(s0,float2(uv0.x   ,uv0.y-ry))*Color0.y
        + tex2D(s0,float2(uv0.x+rx,uv0.y-ry))*Color0.z
        + tex2D(s0,float2(uv0.x-rx,uv0.y   ))*Color0.w
        + tex2D(s0,float2(uv0.x   ,uv0.y   ))*Color1.x
        + tex2D(s0,float2(uv0.x+rx,uv0.y   ))*Color1.y
        + tex2D(s0,float2(uv0.x-rx,uv0.y+ry))*Color1.z
        + tex2D(s0,float2(uv0.x   ,uv0.y+ry))*Color1.w
        + tex2D(s0,float2(uv0.x+rx,uv0.y+ry))*Color2.x;
    }
  }
};

/****************************************************************************/

material Wz4IppSampleRect16
{
  vs asc vs_3_0
  {
    use Wz4IppVSPara;
    void main
    (
      in float3 in_pos : POSITION,
      in float4 in_col : COLOR0,
      in float2 in_uv0 : TEXCOORD0,
      out float2 out_uv0 : TEXCOORD0,
      out float4 out_pos : POSITION,
    )
    {
      out_uv0 = in_uv0;
      out_pos = mul(float4(in_pos,1),mvp);
    }
  }
  ps asc ps_3_0
  {
    cbuffer Wz4IppSampleRect16PS : register(c0) : slot ps 0
    {
      float4 Color[4];
      float4 Offa[4];
      float4 Offb[4];
    };
    permute Wz4IppSampleRect16PSPerm
    {
      AlphaWeighted;
    };

    use Wz4IppSampleRect16PS;
    use Wz4IppSampleRect16PSPerm;

    sampler2D s0 : register(s0);
    void main
    (
      in float2 uv0 : TEXCOORD0,
      out float4 r : COLOR0,
    )
    {
      pif(!AlphaWeighted)
      {
        r = tex2D(s0,float2(uv0 + Offa[0].xy))*Color[0].x
          + tex2D(s0,float2(uv0 + Offa[0].zw))*Color[0].y
          + tex2D(s0,float2(uv0 + Offb[0].xy))*Color[0].z
          + tex2D(s0,float2(uv0 + Offb[0].zw))*Color[0].w

          + tex2D(s0,float2(uv0 + Offa[1].xy))*Color[1].x
          + tex2D(s0,float2(uv0 + Offa[1].zw))*Color[1].y
          + tex2D(s0,float2(uv0 + Offb[1].xy))*Color[1].z
          + tex2D(s0,float2(uv0 + Offb[1].zw))*Color[1].w

          + tex2D(s0,float2(uv0 + Offa[2].xy))*Color[2].x
          + tex2D(s0,float2(uv0 + Offa[2].zw))*Color[2].y
          + tex2D(s0,float2(uv0 + Offb[2].xy))*Color[2].z
          + tex2D(s0,float2(uv0 + Offb[2].zw))*Color[2].w

          + tex2D(s0,float2(uv0 + Offa[3].xy))*Color[3].x
          + tex2D(s0,float2(uv0 + Offa[3].zw))*Color[3].y
          + tex2D(s0,float2(uv0 + Offb[3].xy))*Color[3].z
          + tex2D(s0,float2(uv0 + Offb[3].zw))*Color[3].w;
      }
      pelse
      {
        float4 sample;
        float mids;

        sample = tex2D(s0,float2(uv0 + Offa[0].xy));
        sample.w *= Color[0].x; r.rgb  = sample * sample.w; r.w  = sample.w;
        sample = tex2D(s0,float2(uv0 + Offa[0].zw));
        sample.w *= Color[0].y; r.rgb += sample * sample.w; r.w += sample.w;
        sample = tex2D(s0,float2(uv0 + Offb[0].xy));
        sample.w *= Color[0].z; r.rgb += sample * sample.w; r.w += sample.w;
        sample = tex2D(s0,float2(uv0 + Offb[0].zw));
        sample.w *= Color[0].w; r.rgb += sample * sample.w; r.w += sample.w;

        sample = tex2D(s0,float2(uv0 + Offa[1].xy));
        sample.w *= Color[1].x; r.rgb += sample * sample.w; r.w += sample.w;

        sample = tex2D(s0,float2(uv0 + Offa[1].zw));
        r.rgba += Color[1].y * sample; mids = Color[1].y * (1-sample.w);

        sample = tex2D(s0,float2(uv0 + Offb[1].xy));
        sample.w *= Color[1].z; r.rgb += sample * sample.w; r.w += sample.w;
        sample = tex2D(s0,float2(uv0 + Offb[1].zw));
        sample.w *= Color[1].w; r.rgb += sample * sample.w; r.w += sample.w;

        sample = tex2D(s0,float2(uv0 + Offa[2].xy));
        sample.w *= Color[2].x; r.rgb += sample * sample.w; r.w += sample.w;
        sample = tex2D(s0,float2(uv0 + Offa[2].zw));
        sample.w *= Color[2].y; r.rgb += sample * sample.w; r.w += sample.w;
        sample = tex2D(s0,float2(uv0 + Offb[2].xy));
        sample.w *= Color[2].z; r.rgb += sample * sample.w; r.w += sample.w;
        sample = tex2D(s0,float2(uv0 + Offb[2].zw));
        sample.w *= Color[2].w; r.rgb += sample * sample.w; r.w += sample.w;

        sample = tex2D(s0,float2(uv0 + Offa[3].xy));
        sample.w *= Color[3].x; r.rgb += sample * sample.w; r.w += sample.w;
        sample = tex2D(s0,float2(uv0 + Offa[3].zw));
        sample.w *= Color[3].y; r.rgb += sample * sample.w; r.w += sample.w;
        sample = tex2D(s0,float2(uv0 + Offb[3].xy));
        sample.w *= Color[3].z; r.rgb += sample * sample.w; r.w += sample.w;
        sample = tex2D(s0,float2(uv0 + Offb[3].zw));
        sample.w *= Color[3].w; r.rgb += sample * sample.w; r.w += sample.w;

        r.rgb *= 1.0f / (r.w + mids);
      }
    } 
  }
  header
  {
    sBool AlphaWeighted;
  }
  new
  {
    AlphaWeighted = sFALSE;
  }
  prepare
  {
    VertexShader = VS();
    PixelShader = PS(AlphaWeighted);
  }
};

/****************************************************************************/

material Wz4IppSampleDof
{
  vs asc vs_3_0
  {
    use Wz4IppVSPara;
    void main
    (
      in float3 in_pos : POSITION,
      in float4 in_col : COLOR0,
      in float2 in_uv0 : TEXCOORD0,
      in float2 in_uv1 : TEXCOORD1,
      out float4 out_uv0 : TEXCOORD0,
      out float4 out_pos : POSITION,
    )
    {
      out_uv0 = float4(in_uv0,in_uv1);
      out_pos = mul(float4(in_pos,1),mvp);
    }
  }
  ps asc ps_3_0
  {
    permute Wz4IppSamplerDofPSPerm
    {
      Debug {DebugNormal,DebugScreen,DebugBlur0,DebugBlur1,DebugZ,DebugFade};
    };
    use Wz4IppPSPara;
    use Wz4IppSamplerDofPSPerm;

    sampler2D blur0 : register(s0);
    sampler2D blur1 : register(s1);
    sampler2D blur2 : register(s2);
    sampler2D zbuffer: register(s3);


    float mystep(float t) 
    {
      return t/*3*t*t-2*t*t*t*/; 
    }
    float curve(float z,float4 Color0,float4 Color1,float4 Color2) 
    {
      float f;
      if(z<Color0.x)        // very near
        f = Color1.x;
      else if(z<Color0.y)   // near 
        f = mystep((z-Color0.x)*Color2.y)*Color2.x+Color1.x;
      else if(z<Color0.z)   // focus center
        f = Color1.y;
      else if(z<Color0.w)   // far
        f = mystep((z-Color0.z)*Color2.w)*Color2.z+Color1.z;
      else                  // very far
        f = Color1.w;
      return f;
    }

    void main
    (
      in float4 uv0 : TEXCOORD0,
      out float4 r : COLOR0,
    )
    {
      float2 uvfull = uv0.xy;
      float2 uvblur1 = uv0.zw+Color3.xy;
      float2 uvblur2 = uv0.zw+Color3.zw;

      float z = tex2D(zbuffer,uvfull);
      float f = curve(z,Color0,Color1,Color2);


      if(f<0.5)
        r = lerp(tex2D(blur0,uvfull),tex2D(blur1,uvblur1),f*2);
      else
        r = lerp(tex2D(blur1,uvblur1),tex2D(blur2,uvblur2),f*2-1);

      if(Debug==DebugScreen) r = tex2D(blur0,uvfull);
      if(Debug==DebugBlur0) r = tex2D(blur1,uvblur1);
      if(Debug==DebugBlur1) r = tex2D(blur2,uvblur2);
      if(Debug==DebugZ) r = tex2D(zbuffer,uvfull).xxxx;
      if(Debug==DebugFade) r = f.xxxx;
    }
  }

  header
  {
    sInt Debug; 
  }
  new
  {
    Debug = 0;
  }
  prepare
  {
    VertexShader = VS();
    PixelShader = PS(Debug);
  }
};

/****************************************************************************/

material Wz4IppSampleDof2
{
  vs asc vs_3_0
  {
    use Wz4IppVSPara;
    void main
    (
      in float3 in_pos : POSITION,
      in float4 in_col : COLOR0,
      in float2 in_uv0 : TEXCOORD0,
      in float2 in_uv1 : TEXCOORD1,
      out float4 out_uv0 : TEXCOORD0,
      out float4 out_pos : POSITION,
    )
    {
      out_uv0 = float4(in_uv0,in_uv1);
      out_pos = mul(float4(in_pos,1),mvp);
    }
  }
  ps asc ps_3_0
  {
    // Color0.xyzw = { projZ.xy,focusZ,focusRange }
    // Color1.x = dofAmount/(noFocus-focusRange)

    permute Wz4IppSamplerDof2PSPerm
    {
      Debug {DebugNone,DebugCOC,DebugSmb,DebugMeb,DebugLab};
    };
    use Wz4IppPSPara;
    use Wz4IppSamplerDof2PSPerm;

    sampler2D fullscreen : register(s0);
    sampler2D medblur : register(s1);
    sampler2D down4 : register(s2);
    sampler2D zbuffer : register(s3);
    sampler2D zbuffer4 : register(s4);

    float circleOfConfusion(float z)
    {
      float focalDist = abs(z - Color0.z);
      return saturate((focalDist - Color0.w) * Color1.x);
    }
/*
    half4 getSmallBlur(sampler2D s, half2 uv)
    {
      half4 sum;
      sum  = tex2D(s,uv + Color2.xy);
      sum += tex2D(s,uv + Color2.zw);
      sum += tex2D(s,uv + Color3.xy);
      sum += tex2D(s,uv + Color3.zw);
      return sum * (4.0f / 17.0f);
    }
*/
    void main
    (
      in float4 uv0 : TEXCOORD0,
      out float4 r : COLOR0,
    )
    {
      float2 uvfull = uv0.xy;
      float2 uvdown = uv0.zw;

      // z samples, circle of confusion
      float zFull = tex2D(zbuffer,uvfull).x;
      float zDown = tex2D(zbuffer4,uvdown).x;
      float4 largeBlur = tex2D(down4,uvdown);
      half origCoC = circleOfConfusion(zFull);
      half CoC = largeBlur.a;
      if(zDown > zFull)
        CoC = origCoC;
      //half CoC = lerp(largeBlur.a,origCoC,saturate((zDown - zFull) - 0.5));

      // weights
      half d0 = 0.50h;
      half d1 = 0.25h;
      half d2 = 0.25h;
      half4 weights = saturate(CoC * half4(-1/d0,-1/d1,-1/d2,1/d2)
          + half4(1,(1-d2)/d1,1/d2,(d2-1)/d2));
      weights.yz = min(weights.yz,1-weights.xy);

      float4 cOrig  = tex2D(fullscreen,uvfull);
      half4 cSmall;// = getSmallBlur(fullscreen,uvfull);   // asc can't pass samplers in dx11
      {
        half4 sum;
        sum  = tex2D(fullscreen,uvfull + Color2.xy);
        sum += tex2D(fullscreen,uvfull + Color2.zw);
        sum += tex2D(fullscreen,uvfull + Color3.xy);
        sum += tex2D(fullscreen,uvfull + Color3.zw);
        cSmall = sum * (4.0f / 17.0f);
      }

      half3 cMed   = tex2D(medblur,uvfull).rgb;
      half3 cLarge = tex2D(down4,uvdown).rgb;

      r.rgb = weights.y * cSmall + weights.z * cMed + weights.w * cLarge;
      r.a = cOrig.a;
      r.rgb += cOrig*(1.0-saturate(dot(weights.yzw,half3(16.0f/17.0f,1.0f,1.0f))));
      
      pif(Debug==DebugCOC) r = CoC;
      pif(Debug==DebugSmb) r = cSmall;
      pif(Debug==DebugMeb) r.rgb = cMed;
      pif(Debug==DebugLab) r.rgb = cLarge;
    }
  }

  header
  {
    sInt Debug; 
  }
  new
  {
    Debug = 0;
  }
  prepare
  {
    VertexShader = VS();
    PixelShader = PS(Debug);
  }
};

material Wz4IppCoCDof2
{
  vs asc vs_3_0
  {
    use Wz4IppVSPara;
    void main
    (
      in float3 in_pos : POSITION,
      in float4 in_col : COLOR0,
      in float2 in_uv0 : TEXCOORD0,
      in float2 in_uv1 : TEXCOORD1,
      out float4 out_uv0 : TEXCOORD0,
      out float4 out_pos : POSITION,
    )
    {
      out_uv0 = float4(in_uv0,in_uv1);
      out_pos = mul(float4(in_pos,1),mvp);
    }
  }
  ps asc ps_3_0
  {
    // Color0.xyzw = { projZ.xy,focusZ,focusRange }
    // Color1.x = dofAmount/(noFocus-focusRange)
    use Wz4IppPSPara;

    sampler2D fullscreen : register(s0);
    sampler2D zbuffer : register(s1);

    float circleOfConfusion(float z)
    {
      float focalDist = abs(z - Color0.z);
      return saturate((focalDist - Color0.w) * Color1.x);
    }

    void main
    (
      in float4 uv0 : TEXCOORD0,
      out float4 r : COLOR0,
    )
    {
      float2 uvfull = uv0.xy;
      float2 uvdown = uv0.zw;

      r.rgb = tex2D(fullscreen,uvfull);
      r.w = circleOfConfusion(tex2D(zbuffer,uvfull));
    }
  }
};

/****************************************************************************/

asc
{
  cbuffer Wz4IppSSAOPSPara : register(c0) : slot ps 0
  {
    float3 projScale;
    float3 invProjScale;
    float4 occludeAndRadius; // (minOcclude,maxOcclude,sampleRadius,intensity)
    float4 uvStep;
    float4 FogMinMaxDens;
    float3 FogCenter;
    float4 sampleVec[24];

    extern void Set(const sViewport &view)
    {
      projScale.x =  view.ZoomX / 2.0f;
      projScale.y = -view.ZoomY / 2.0f;
      projScale.z = 1.0f;
      
      invProjScale.x =  2.0f / view.ZoomX;
      invProjScale.y = -2.0f / view.ZoomY;
      invProjScale.z = 1.0f;
      invProjScale *= view.ClipFar;
    }
  };
}

material Wz4IppSSAOMain
{
  vs asc vs_3_0
  {
    use Wz4IppVSPara;
    void main
    (
      in float3 in_pos : POSITION,
      in float4 in_col : COLOR0,
      in float2 in_uv0 : TEXCOORD0,
      in float2 in_uv1 : TEXCOORD1,
      out float4 out_uv0 : TEXCOORD0,
      out float4 out_pos : POSITION,
    )
    {
      out_uv0 = float4(in_uv0,in_uv1);
      out_pos = mul(float4(in_pos,1),mvp);
    }
  }
  ps asc ps_3_0
  {
    use Wz4IppSSAOPSPara;

    sampler2D zbuffer : register(s0);
    sampler2D zbuffull : register(s1);
    sampler2D randoms : register(s2);

    permute Wz4IPPSSAOMainPerm
    {
      Samples { Samples4, Samples8, Samples16 };
      Fog;
    };

    use Wz4IPPSSAOMainPerm;

    float3 reconstruct(float2 pos,float z)
    {
      return invProjScale * float3(pos.xy-0.5,1.0) * z;
    }

    float2 project(float3 pos,float2 offs)
    {
      return projScale.xy * (pos.xy / pos.z) + offs;
    }

    float occlusionFunction(float x)
    {
      if(x <= occludeAndRadius.x)
        x = 0.0;
      else
        x = saturate(1.0 / (x*x) - 1.0 / (occludeAndRadius.y*occludeAndRadius.y));

      return x;
    }

    void main
    (
      in float4 pos01 : TEXCOORD0,            
      in float4 posxy : VPOS,      
      out float4 result : COLOR0,
    )
    {
      float2 sampleOffs = 0.5 + 0.5 * uvStep.xy;
      int nSamples;

      pif(Samples == Samples4)
        nSamples = 4;
      pelse pif(Samples == Samples8)
        nSamples = 8;
      pelse pif(Samples == Samples16)
        nSamples = 16;

      float z = tex2D(zbuffull,pos01.zw).r;
      if(z < 1.0)
      {
        float3 pos = reconstruct(pos01.zw,z);

        float intensity = 1;
        pif(Fog)
        {
          float dist = length(pos-FogCenter);
          intensity = saturate((dist-FogMinMaxDens.x)*FogMinMaxDens.y);
          intensity = 1-(1-intensity)*FogMinMaxDens.w;
        }
        if(intensity>0)
        {
          half3 normal = tex2D(zbuffull,pos01.zw).yzw;
          half3 random = 0;
          random = tex2D(randoms,posxy.xy/4).xyz;
            
          float radius = max(pos.z,8.0) * occludeAndRadius.z / 8.0;

          float totalOcclude = 0;

          for(int i=0;i<nSamples;i++)
          {
            half3 sampleDir = sampleVec[i].xyz;
            sampleDir = reflect(random, sampleDir);
            if(dot(sampleDir,normal) < 0)
              sampleDir = -sampleDir;
            
            // pick occlusion sample
            float3 samplePoint = pos + radius * sampleDir;
            float sampleZ=0;
            sampleZ = tex2Dlod(zbuffer,float4( project(samplePoint, sampleOffs),0,0)).r * invProjScale.z;
            totalOcclude += occlusionFunction(samplePoint.z - sampleZ);
          }
          result.rgb = normalize(normal) * 0.5 + 0.5;
          result.a = 1 - (totalOcclude / nSamples)*(intensity*FogMinMaxDens.z);
        }
        else
          result = float4(0.5,0.5,0.0,1);        
      }
      else
        result = float4(0.5,0.5,0.0,1);
    }
  }

  header
  {
    sInt SampleLevel; // 0=4 samples, 1=8 samples, 2=16 samples
    sInt FogMode;
  }
  new
  {
    SampleLevel = 0;
    FogMode = 0;
  }
  prepare
  {
    VertexShader = VS();
    PixelShader = PS(SampleLevel|(FogMode?Wz4IPPSSAOMainPermMask_Fog:0));
  }
};

material Wz4IppSSAOFinish
{
  vs asc vs_3_0
  {
    use Wz4IppVSPara;
    void main
    (
      in float3 in_pos : POSITION,
      in float4 in_col : COLOR0,
      in float2 in_uv0 : TEXCOORD0,
      in float2 in_uv1 : TEXCOORD1,
      out float4 out_uv0 : TEXCOORD0,
      out float4 out_pos : POSITION,
    )
    {
      out_uv0 = float4(in_uv0,in_uv1);
      out_pos = mul(float4(in_pos,1),mvp);
    }
  }
  ps asc ps_3_0
  {
    cbuffer Wz4IppSSAOFinishPSPara : register(c0) : slot ps 0
    {
      float4 Color[4];
      float4 Offa[4];
      float4 Offb[4];
      float4 Tweak;
      float4 ScrColor;
    };

    permute Wz4IPPSSAOFinishPerm
    {
      Output { OutputResult,OutputNormals,OutputSSAO };
    };

    use Wz4IppSSAOFinishPSPara;
    use Wz4IPPSSAOFinishPerm;

    sampler2D screen : register(s0);
    sampler2D ssao : register(s1);

    float2 doSample(float2 uv,float4 middot,float weight)
    {
      float4 sample = tex2D(ssao,uv);
      return saturate(dot(middot,float4(sample.xyz,1))) * weight * float2(sample.w,1);
      //return weight * float2(sample.w,1);
    }

    void main
    (
      in float2 uv0 : TEXCOORD0,
      out float4 result : COLOR0,
    )
    {
      float ssaos = 0.0;
      float weight = 0.0;
      float4 mid;
      float4 sample;

      mid = tex2D(ssao,uv0);
      mid.xyz = mid.xyz*2-1;

      // we need to compute sample.xyz*2-1 for all the samples, then dot it with mid.xyz. it pays
      // to precalc a bit here.
      float4 middot;
      middot.xyz = mid.xyz * Tweak.y * 2; // the *2 part
      middot.w = Tweak.z - Tweak.y*dot(mid.xyz,float3(1,1,1)); // -1 part

      pif(Output == OutputNormals)
        result = mid;
      pelse
      {
        // other samples
        float2 accu;
        float4 uva,uvb;
        
        uva   = uv0.xyxy + Offa[0];
        uvb   = uv0.xyxy + Offb[0];
        accu  = doSample(uva.xy,middot,Color[0].x);
        accu += doSample(uva.zw,middot,Color[0].y);
        accu += doSample(uvb.xy,middot,Color[0].z);
        accu += doSample(uvb.zw,middot,Color[0].w);

        uva   = uv0.xyxy + Offa[1];
        uvb   = uv0.xyxy + Offb[1];
        accu += doSample(uva.xy,middot,Color[1].x);
        accu += doSample(uva.zw,middot,Color[1].y);
        accu += doSample(uvb.xy,middot,Color[1].z);
        accu += doSample(uvb.zw,middot,Color[1].w);

        uva   = uv0.xyxy + Offa[2];
        uvb   = uv0.xyxy + Offb[2];
        accu += doSample(uva.xy,middot,Color[2].x);
        accu += doSample(uva.zw,middot,Color[2].y);
        accu += doSample(uvb.xy,middot,Color[2].z);
        accu += doSample(uvb.zw,middot,Color[2].w);

        uva   = uv0.xyxy + Offa[3];
        uvb   = uv0.xyxy + Offb[3];
        accu += doSample(uva.xy,middot,Color[3].x);
        accu += doSample(uva.zw,middot,Color[3].y);
        accu += doSample(uvb.xy,middot,Color[3].z);
        accu += doSample(uvb.zw,middot,Color[3].w);

        ssaos = accu.x;
        weight = accu.y;

        if(weight > 1/64.0)
          ssaos /= weight;
        else
          ssaos = mid.w;

        // HACK: just one (center sample)
        //sample = tex2D(ssao,float2(uv0 + Offa[1].zw));
        ////mid = sample.xyz*2-1;
        //ssaos = sample.w;

        float4 scr = tex2D(screen,uv0);

        pif(Output == OutputResult)
        {
          result.w = scr.w;
          result.xyz = lerp(ScrColor.xyz,scr.xyz,ssaos);
        }
        pif(Output == OutputSSAO)   result = ssaos;
      }
    }
  }

  header
  {
    sInt Output;
  }
  new
  {
    Output = 0;
  }
  prepare
  {
    VertexShader = VS();
    PixelShader = PS(Output);
  }
};

material Wz4IppColorBalance
{
  vs asc vs_3_0
  {
    use Wz4IppVSPara;
    void main
    (
      in float3 in_pos : POSITION,
      in float4 in_col : COLOR0,
      in float2 in_uv0 : TEXCOORD0,
      out float2 out_uv0 : TEXCOORD0,
      out float4 out_pos : POSITION,
    )
    {
      out_uv0 = in_uv0;
      out_pos = mul(float4(in_pos,1),mvp);
    }
  }
  ps asc ps_3_0
  {
    use Wz4IppPSPara;
    sampler2D s0 : register(s0);
    void main
    (
      in float2 uv0 : TEXCOORD0,
      out float4 r : COLOR0,
      )
    {
      r = tex2D(s0,uv0);
      r.rgb = saturate(r * Color0 + Color1);
      r.r = pow(r.r,Color2.r);
      r.g = pow(r.g,Color2.g);
      r.b = pow(r.b,Color2.b);
    }
  }
};

 
material Wz4IppColorSaw
{
  vs asc vs_3_0
  {
    use Wz4IppVSPara;
    void main
    (
      in float3 in_pos : POSITION,
      in float4 in_col : COLOR0,
      in float2 in_uv0 : TEXCOORD0,
      out float2 out_uv0 : TEXCOORD0,
      out float4 out_pos : POSITION,
    )
    {
      out_uv0 = in_uv0;
      out_pos = mul(float4(in_pos,1),mvp);
    }
  }
  ps asc ps_3_0
  {
    use Wz4IppPSPara;
    sampler2D s0 : register(s0);
    void main
    (
      in float2 uv0 : TEXCOORD0,
      out float4 r : COLOR0,
      )
    {
      float4 x = tex2D(s0,uv0);

      x.xyz = (x.xyz * Color0.xyz) % 1;   // repeat and modulo
      x.xyz = saturate( (x.xyz-Color2.xyz ) * Color3.xyz+Color2.xyz);    // threshold
      r.xyz = pow(x.xyz, Color1.xyz);     // power
      r.w = x.w;
    }
  }
};

material Wz4IppColorGrade
{
  header
  {
    sBool UseSMH;
    sBool UseGamma;
  }

  prepare
  {
    sInt ps = 0;
    if(UseSMH)
      ps |= Wz4IppColorGradePSPermMask_UseSMH;
    if(UseGamma)
      ps |= Wz4IppColorGradePSPermMask_UseGamma;

    VertexShader = VS();
    PixelShader = PS(ps);
  }

  vs asc vs_3_0
  {
    use Wz4IppVSPara;
    void main
    (
      in float3 in_pos : POSITION,
      in float4 in_col : COLOR0,
      in float2 in_uv0 : TEXCOORD0,
      out float4 out_pos : POSITION,
      out float2 out_uv0 : TEXCOORD0,
    )
    {
      out_uv0 = in_uv0;
      out_pos = mul(float4(in_pos,1),mvp);
    }
  }

  ps asc ps_3_0
  {
    use Wz4IppColorGradePSPerm;
    use Wz4IppColorGradePSPara;
    sampler2D s0 : register(s0);
    void main
    (
      in float2 uv0 : TEXCOORD0,
      out float4 result : COLOR0,
      )
    {      
      float4 color = tex2D(s0,uv0);
      float alpha = color.a;
      color.a=1;
      
      float4 mid;
      mid.r = dot(color,MatrixMr);
      mid.g = dot(color,MatrixMg);
      mid.b = dot(color,MatrixMb);
      
      pif (UseSMH)
      {
        float4 sha;
        sha.r = dot(color,MatrixSr);
        sha.g = dot(color,MatrixSg);
        sha.b = dot(color,MatrixSb);
        
        float4 high;
        high.r = dot(color,MatrixHr);
        high.g = dot(color,MatrixHg);
        high.b = dot(color,MatrixHb);
        
        float lum = dot(color,LumWeights);
        float sweight = 1.0f-saturate(lum*Params1.x);
        float hweight = 1.0f-saturate((1.0f-lum)*Params1.y);
        float mweight = 1.0f-(sweight+hweight);

        result.rgb=saturate(sha*sweight+mid*mweight+high*hweight);
      }
      pelse
      {
        result.rgb=saturate(mid);
      }
      
      pif (UseGamma)
      {
        result.rgb = pow(result.rgb, Gamma.rgb);
      }
      result.a = alpha;
      
      /* TODO: implement me!
      // vignette
      float2 vpoint;
      vpoint.x=dot(uv0.xy,VignetteMatrix.xy)+VignetteShift.x;
      vpoint.y=dot(uv0.xy,VignetteMatrix.zw)+VignetteShift.y;
      
      float dist=sqrt(dot(vpoint.xy,vpoint.xy));
      
      
      result.rg=vpoint;
      result.b=(dist>1.0f);
      */
    }
  }
};


material Wz4IppCrashZoom
{
  vs asc vs_3_0
  {
    use Wz4IppVSPara;
    void main
    (
      in float3 in_pos : POSITION,
      in float4 in_col : COLOR0,
      in float2 in_uv0 : TEXCOORD0,
      out float2 out_uv0 : TEXCOORD0,
      out float4 out_pos : POSITION,
    )
    {
      out_uv0 = in_uv0;
      out_pos = mul(float4(in_pos,1),mvp);
    }
  }
  ps asc ps_3_0
  {
    use Wz4IppPSPara;
    sampler2D s0 : register(s0);
    void main
    (
      in float2 uv0 : TEXCOORD0,
      out float4 r : COLOR0,
    )
    {
      float3 col = 0;
      float2 add = Color1.xy;
      float2 mul = Color1.zw;
      for(int i=0;i<8;i++)
      {
        float2 uv = (uv0-add)*(1-mul*i*0.125)+add;
        col += tex2D(s0,uv).xyz;
      }
      r.xyz = col * Color0.xyz;
      r.w = 1;
    }
  }
};

/****************************************************************************/

material Layer2dMtrl
{
  vs
  {
    asc vs_3_0                 // hlsl code
    {
      use Wz4IppVSPara;
      void main
      (
        in float3 in_pos : POSITION,
        in float4 in_col : COLOR0,
        in float2 in_uv0 : TEXCOORD0,
        in float2 in_uv1 : TEXCOORD1,
        out float4 out_col : COLOR0,
        out float2 out_uv0 : TEXCOORD0,
        out float2 out_uv1 : TEXCOORD1,
        out float4 out_pos : POSITION, 
      )
      {
        out_uv0 = in_uv0;
        out_uv1 = in_uv1;
        out_col = in_col;
        out_pos = mul(float4(in_pos,1),mvp);
      }
    }
  }

  ps
  {
    asc ps_3_0
    {
      permute Layer2dMtrlPSPerm
      {
        T1 { T1Off,T1Mul,T1Add,T1Sub };
      };
      use Layer2dMtrlPSPerm;

      sampler2D s0 : register(s0);
      sampler2D s1 : register(s1) : pif(T1);

      void main
      (
        in float4 color : COLOR0,
        in float2 uv0 : TEXCOORD0,
        in float2 uv1 : TEXCOORD1,
        out float4 result : COLOR0
      )
      {
        pif(T1==T1Off) result = color *  tex2D(s0,uv0);
        pif(T1==T1Mul) result = color * (tex2D(s0,uv0) * tex2D(s1,uv1));
        pif(T1==T1Add) result = color * (tex2D(s0,uv0) + tex2D(s1,uv1));
        pif(T1==T1Sub) result = color * (tex2D(s0,uv0) - tex2D(s1,uv1));
      }
    }    
  }

  header
  {
    sInt MixMode;
  }
  
  new
  {
    MixMode = 0;
  }

  prepare
  {
    sInt n = 0;
    if(Texture[1])
      n = MixMode+1;

    VertexShader = VS();
    PixelShader = PS(n); 
  }
};

/****************************************************************************/

material Wz4IppCrackFixer
{
  vs asc vs_3_0
  {
    use Wz4IppVSPara;
    void main
    (
      in float3 in_pos : POSITION,
      in float4 in_col : COLOR0,
      in float2 in_uv0 : TEXCOORD0,
      out float2 out_uv0 : TEXCOORD0,
      out float4 out_pos : POSITION,
    )
    {
      out_uv0 = in_uv0;
      out_pos = mul(float4(in_pos,1),mvp);
    }
  }
  ps asc ps_3_0
  {
    use Wz4IppPSPara;
    sampler2D s0 : register(s0);
    void main
    (
      in float2 uv0 : TEXCOORD0,
      out float4 r : COLOR0,
    )
    {
      float4 x = tex2D(s0,uv0);
      float fu = Color1.x;
      float fv = Color1.y;
      float tresh = Color0.x;

      float4 na[4];
      float4 nb[4];
      float4 nc[4];

      na[0] = tex2D(s0,float2(uv0.x   ,uv0.y+fv));
      na[1] = tex2D(s0,float2(uv0.x   ,uv0.y-fv));
      na[2] = tex2D(s0,float2(uv0.x-fu,uv0.y   ));
      na[3] = tex2D(s0,float2(uv0.x+fu,uv0.y   ));
      nb[0] = tex2D(s0,float2(uv0.x     ,uv0.y+fv*2));
      nb[1] = tex2D(s0,float2(uv0.x     ,uv0.y-fv*2));
      nb[2] = tex2D(s0,float2(uv0.x-fu*2,uv0.y     ));
      nb[3] = tex2D(s0,float2(uv0.x+fu*2,uv0.y     ));
      nc[0] = tex2D(s0,float2(uv0.x+fu*2,uv0.y+fv*2));
      nc[1] = tex2D(s0,float2(uv0.x+fu*2,uv0.y-fv*2));
      nc[2] = tex2D(s0,float2(uv0.x-fu*2,uv0.y-fv*2));
      nc[3] = tex2D(s0,float2(uv0.x-fu*2,uv0.y+fv*2));

      float4 nna = float4(na[0].w,na[1].w,na[2].w,na[3].w);
      float4 nnb = float4(nb[0].w,nb[1].w,nb[2].w,nb[3].w);
      float4 nnc = float4(nc[0].w,nc[1].w,nc[2].w,nc[3].w);

      nna = nna<x.w-tresh; 
      nnb = nnb<x.w-tresh; 
      nnc = nnc<x.w-tresh; 

      float nn = dot(nna,1)+dot(nnb,1)+dot(nnc,1);
 
      if(nn>=Color0.y)
      {
        x.xyz = ( na[0].xyz*nna.x + na[1].xyz*nna.y + na[2].xyz*nna.z + na[3].xyz*nna.w
                + nb[0].xyz*nnb.x + nb[1].xyz*nnb.y + nb[2].xyz*nnb.z + nb[3].xyz*nnb.w
                + nc[0].xyz*nnc.x + nc[1].xyz*nnc.y + nc[2].xyz*nnc.z + nc[3].xyz*nnc.w ) / nn;
      }

      r = x;
    }
  }
};

/****************************************************************************/

material Wz4IppFXAA3Prepare // copy luminance into alpha
{
  vs asc vs_3_0
  {
    use Wz4IppVSPara;
    void main
    (
      in float3 in_pos : POSITION,
      in float2 in_uv0 : TEXCOORD0,
      out float2 out_uv0 : TEXCOORD0,
      out float4 out_pos : POSITION,
    )
    {
      out_pos = mul(float4(in_pos,1),mvp);
      out_uv0 = in_uv0;
    }
  }

  ps asc ps_3_0
  {
    sampler2D s0 : register(s0);
    
    float4 main (in float2 pos : TEXCOORD0) : COLOR0
    {
      float4 col = tex2D(s0, pos);
      col.a = dot(col.rgb, float3(0.299, 0.587, 0.114));
      return col.rgba;
    }
  }
};

material Wz4IppFXAA3
{
  vs asc vs_3_0
  {
    use Wz4IppVSPara;
    void main
    (
      in float3 in_pos : POSITION,
      in float2 in_uv0 : TEXCOORD0,
      out float2 out_uv0 : TEXCOORD0,
      out float4 out_pos : POSITION,
    )
    {
      out_pos = mul(float4(in_pos,1),mvp);
      out_uv0 = in_uv0;
    }
  }
    
  ps asc ps_3_0
  {
    use Wz4IppFXAA3Para;
    sampler2D tex : register(s0);
        
    float4 FxaaTexOff(float2 p, float2 o, float2 r) 
    {
      return tex2Dlod(tex, float4(p + (o * r), 0, 0));
    }

    float4 FxaaTexTop(float2 p)
    {
      return tex2Dlod(tex, float4(p, 0, 0));
    }
    
    float4 main( float2 pos : TEXCOORD0) : COLOR0
    {        
      const float FXAA_QUALITY__EDGE_THRESHOLD_MIN=(1.0/12.0);
      const float FXAA_QUALITY__EDGE_THRESHOLD=(1.0/6.0);
      const int   FXAA_SEARCH_STEPS=6;
      const float FXAA_SEARCH_THRESHOLD=(1.0/4.0);
      const float FXAA_QUALITY__SUBPIX_CAP=(3.0/4.0);
      const float FXAA_QUALITY__SUBPIX_TRIM=(1.0/4.0);
             
      float lumaN = FxaaTexOff(pos.xy, float2(0, -1), rcpFrame.xy).w;
      float lumaW = FxaaTexOff(pos.xy, float2(-1, 0), rcpFrame.xy).w;
      float4 rgbyM = FxaaTexTop(pos.xy);
      float lumaE = FxaaTexOff(pos.xy, float2( 1, 0), rcpFrame.xy).w;
      float lumaS = FxaaTexOff(pos.xy, float2( 0, 1), rcpFrame.xy).w;
      float lumaM = rgbyM.w;
      
      float rangeMin = min(lumaM, min(min(lumaN, lumaW), min(lumaS, lumaE)));
      float rangeMax = max(lumaM, max(max(lumaN, lumaW), max(lumaS, lumaE)));
      float range = rangeMax - rangeMin;

      if(range < max(FXAA_QUALITY__EDGE_THRESHOLD_MIN, rangeMax * FXAA_QUALITY__EDGE_THRESHOLD))
        return rgbyM;
        
      float lumaNW = FxaaTexOff(pos.xy, float2(-1,-1), rcpFrame.xy).w;
      float lumaNE = FxaaTexOff(pos.xy, float2( 1,-1), rcpFrame.xy).w;
      float lumaSW = FxaaTexOff(pos.xy, float2(-1, 1), rcpFrame.xy).w;
      float lumaSE = FxaaTexOff(pos.xy, float2( 1, 1), rcpFrame.xy).w;

      float lumaL = (lumaN + lumaW + lumaE + lumaS) * 0.25;
      float rangeL = abs(lumaL - lumaM);
      float blendL = saturate((rangeL / range) - FXAA_QUALITY__SUBPIX_TRIM) * (1.0/(1.0 - FXAA_QUALITY__SUBPIX_TRIM)); 
      blendL = min(FXAA_QUALITY__SUBPIX_CAP, blendL);

      float edgeVert = 
                abs(lumaNW + (-2.0 * lumaN) + lumaNE) +
          2.0 * abs(lumaW  + (-2.0 * lumaM) + lumaE ) +
                abs(lumaSW + (-2.0 * lumaS) + lumaSE);
      float edgeHorz = 
                abs(lumaNW + (-2.0 * lumaW) + lumaSW) +
          2.0 * abs(lumaN  + (-2.0 * lumaM) + lumaS ) +
                abs(lumaNE + (-2.0 * lumaE) + lumaSE);
      bool horzSpan = edgeHorz >= edgeVert;

      float lengthSign = horzSpan ? -rcpFrame.y : -rcpFrame.x;
      if(!horzSpan) lumaN = lumaW;
      if(!horzSpan) lumaS = lumaE;
      float gradientN = abs(lumaN - lumaM);
      float gradientS = abs(lumaS - lumaM);
      lumaN = (lumaN + lumaM) * 0.5;
      lumaS = (lumaS + lumaM) * 0.5;

      bool pairN = gradientN >= gradientS;
      if(!pairN) lumaN = lumaS;
      if(!pairN) gradientN = gradientS;
      if(!pairN) lengthSign *= -1.0;
      float2 posN;
      posN.x = pos.x + (horzSpan ? 0.0 : lengthSign * 0.5);
      posN.y = pos.y + (horzSpan ? lengthSign * 0.5 : 0.0);

      gradientN *= FXAA_SEARCH_THRESHOLD;

      float2 posP = posN;
      float2 offNP = horzSpan ? float2(rcpFrame.x, 0.0) : float2(0.0f, rcpFrame.y); 
      float lumaEndN;
      float lumaEndP;
      bool doneN = false;
      bool doneP = false;
      posN += offNP * (-1.5);
      posP += offNP * ( 1.5);
      for(int i = 0; i < FXAA_SEARCH_STEPS; i++) {
          lumaEndN = FxaaTexTop(posN.xy).w;
          lumaEndP = FxaaTexTop(posP.xy).w;
          bool doneN2 = abs(lumaEndN - lumaN) >= gradientN;
          bool doneP2 = abs(lumaEndP - lumaN) >= gradientN;
          if(doneN2 && !doneN) posN += offNP;
          if(doneP2 && !doneP) posP -= offNP;
          if(doneN2 && doneP2) break;
          doneN = doneN2;
          doneP = doneP2;
          if(!doneN) posN -= offNP * 2.0;
          if(!doneP) posP += offNP * 2.0; }

      float dstN = horzSpan ? pos.x - posN.x : pos.y - posN.y;
      float dstP = horzSpan ? posP.x - pos.x : posP.y - pos.y;

      bool directionN = dstN < dstP;
      lumaEndN = directionN ? lumaEndN : lumaEndP;

      if(((lumaM - lumaN) < 0.0) == ((lumaEndN - lumaN) < 0.0)) 
          lengthSign = 0.0;

      float spanLength = (dstP + dstN);
      dstN = directionN ? dstN : dstP;
      float subPixelOffset = 0.5 + (dstN * (-1.0/spanLength));
      subPixelOffset += blendL * (1.0/8.0);
      subPixelOffset *= lengthSign;
      float3 rgbF = FxaaTexTop(float2(
          pos.x + (horzSpan ? 0.0 : subPixelOffset),
          pos.y + (horzSpan ? subPixelOffset : 0.0))).xyz;

      //lumaL *= lumaL;   uncomment for linear space
      float lumaF = dot(rgbF, float3(0.299, 0.587, 0.114)) + (1.0/(65536.0*256.0));
      float lumaB = lerp(lumaF, lumaL, blendL);
      float scale = min(4.0, lumaB/lumaF);
      rgbF *= scale;
      return float4(rgbF, lumaM); 
    }
    
  }
};

/****************************************************************************/

material Wz4IppColorMath
{
  vs asc vs_3_0
  {
    use Wz4IppVSPara;
    void main
    (
      in float3 in_pos : POSITION,
      in float4 in_col : COLOR0,
      in float2 in_uv0 : TEXCOORD0,
      out float2 out_uv0 : TEXCOORD0,
      out float4 out_pos : POSITION,
    )
    {
      out_uv0 = in_uv0;
      out_pos = mul(float4(in_pos,1),mvp);
    }
  }
  ps asc ps_3_0
  {
    use Wz4IppPSPara;
    sampler2D s0 : register(s0);
    void main
    (
      in float2 uv0 : TEXCOORD0,
      out float4 r : COLOR0,
    )
    {
      r = tex2D(s0,uv0);
      r = saturate(r+Color0);
      r = r*Color1;
    }
  }
}; 

/****************************************************************************/

material Wz4IppCustom
{
  vs asc vs_3_0
  {
    use Wz4IppVSCustomPara;
    void main
    (
      in float3 in_pos : POSITION,
      in float2 in_uv0 : TEXCOORD0,
      out float2 out_uv0 : TEXCOORD0,
      out float4 out_pos : POSITION,
    )
    {
      out_uv0 = in_uv0;
      out_pos = mul(float4(in_pos,1),mvp);
    }
  }
  ps asc ps_3_0
  {
    use Wz4IppPSCustomPara;
    void main
    (
      in float2 uv0 : TEXCOORD0,
      out float4 r : COLOR0
    )
    {
      r = float4(1,0,0,1);
    }
  }
};
